From 450c3d44841fbf4d8920e34bb141150bff9d8d71 Mon Sep 17 00:00:00 2001 From: Massimiliano Pippi Date: Thu, 22 Dec 2022 18:37:36 +0100 Subject: [PATCH] fix: build `pdftotext` from sources (#3746) * build pdftotext from sources * trigger the build on my own PR - to be reverted * trigger the build on my own PR - to be reverted * Update docker_release.yml --- docker/Dockerfile.base | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/docker/Dockerfile.base b/docker/Dockerfile.base index edfda4095..e6a0b3f63 100644 --- a/docker/Dockerfile.base +++ b/docker/Dockerfile.base @@ -7,14 +7,18 @@ ARG haystack_version ARG haystack_extras RUN apt-get update && apt-get install -y --no-install-recommends \ - build-essential gcc git curl \ + build-essential gcc git curl cmake \ tesseract-ocr libtesseract-dev poppler-utils # Install PDF converter -RUN curl -O https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && \ - tar -xvf xpdf-tools-linux-4.04.tar.gz && \ - cp xpdf-tools-linux-4.04/bin64/pdftotext /opt && \ - rm -rf xpdf-tools-linux-4.04 +RUN curl -O https://dl.xpdfreader.com/xpdf-4.04.tar.gz && \ + tar -xvf xpdf-4.04.tar.gz && \ + cd xpdf-4.04 && \ + cmake . && \ + make && \ + cp xpdf/pdftotext /opt && \ + cd .. && \ + rm -rf xpdf-4.04 # Shallow clone Haystack repo, we'll install from the local sources RUN git clone --depth=1 --branch=${haystack_version} https://github.com/deepset-ai/haystack.git /opt/haystack