mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-11-18 11:04:35 +00:00
ci: Move xpdf build into separate container (#4199)
* Create Dockerfile and hcl config to build Xpdf * Create workflow to build Xpdf Docker image * Update Dockerfile.base to not build Xpdf * Fix CWD removal and arg casing * Fix ARG setting
This commit is contained in:
parent
aaa1522c45
commit
30cdb81f19
39
.github/workflows/xpdf_release.yml
vendored
Normal file
39
.github/workflows/xpdf_release.yml
vendored
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
name: Xpdf Docker image release
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
paths:
|
||||||
|
- docker/docker-bake-xpdf.hcl
|
||||||
|
- docker/Dockerfile.xpdf
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
publish-xpdf-image:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
env:
|
||||||
|
DOCKER_REPO_NAME: deepset/xpdf
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@v2
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v2
|
||||||
|
|
||||||
|
- name: Login to DockerHub
|
||||||
|
uses: docker/login-action@v1
|
||||||
|
with:
|
||||||
|
username: ${{ secrets.DOCKER_HUB_USER }}
|
||||||
|
password: ${{ secrets.DOCKER_HUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Build and publish Xpdf image
|
||||||
|
uses: docker/bake-action@v2
|
||||||
|
with:
|
||||||
|
files: "docker-bake-xpdf.hcl"
|
||||||
|
workdir: docker
|
||||||
|
targets: xpdf
|
||||||
|
push: true
|
||||||
@ -7,20 +7,6 @@ ARG DEBIAN_FRONTEND=noninteractive
|
|||||||
ARG haystack_version
|
ARG haystack_version
|
||||||
ARG haystack_extras
|
ARG haystack_extras
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
||||||
build-essential gcc git curl cmake \
|
|
||||||
tesseract-ocr libtesseract-dev poppler-utils
|
|
||||||
|
|
||||||
# Install PDF converter
|
|
||||||
RUN curl -O https://dl.xpdfreader.com/xpdf-4.04.tar.gz && \
|
|
||||||
tar -xvf xpdf-4.04.tar.gz && \
|
|
||||||
cd xpdf-4.04 && \
|
|
||||||
cmake . && \
|
|
||||||
make && \
|
|
||||||
cp xpdf/pdftotext /opt && \
|
|
||||||
cd .. && \
|
|
||||||
rm -rf xpdf-4.04
|
|
||||||
|
|
||||||
# Shallow clone Haystack repo, we'll install from the local sources
|
# Shallow clone Haystack repo, we'll install from the local sources
|
||||||
RUN git clone --depth=1 --branch=${haystack_version} https://github.com/deepset-ai/haystack.git /opt/haystack
|
RUN git clone --depth=1 --branch=${haystack_version} https://github.com/deepset-ai/haystack.git /opt/haystack
|
||||||
WORKDIR /opt/haystack
|
WORKDIR /opt/haystack
|
||||||
@ -37,7 +23,8 @@ RUN pip install --upgrade pip && \
|
|||||||
FROM $base_image AS final
|
FROM $base_image AS final
|
||||||
|
|
||||||
COPY --from=build-image /opt/venv /opt/venv
|
COPY --from=build-image /opt/venv /opt/venv
|
||||||
COPY --from=build-image /opt/pdftotext /usr/local/bin
|
COPY --from=deepset/xpdf:latest /opt/pdftotext /usr/local/bin
|
||||||
|
|
||||||
# pdftotext requires fontconfig runtime
|
# pdftotext requires fontconfig runtime
|
||||||
RUN apt-get update && apt-get install -y libfontconfig && rm -rf /var/lib/apt/lists/*
|
RUN apt-get update && apt-get install -y libfontconfig && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
|||||||
22
docker/Dockerfile.xpdf
Normal file
22
docker/Dockerfile.xpdf
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
FROM ubuntu:latest
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y --no-install-recommends \
|
||||||
|
build-essential \
|
||||||
|
cmake \
|
||||||
|
curl \
|
||||||
|
gcc \
|
||||||
|
git \
|
||||||
|
libtesseract-dev \
|
||||||
|
poppler-utils \
|
||||||
|
tesseract-ocr
|
||||||
|
|
||||||
|
ARG xpdf_version
|
||||||
|
RUN curl -O https://dl.xpdfreader.com/xpdf-${xpdf_version}.tar.gz && \
|
||||||
|
tar -xvf xpdf-${xpdf_version}.tar.gz && \
|
||||||
|
cd xpdf-${xpdf_version} && \
|
||||||
|
cmake . && \
|
||||||
|
make && \
|
||||||
|
cp xpdf/pdftotext /opt && \
|
||||||
|
cd .. \
|
||||||
|
rm -rf xpdf-${xpdf_version}
|
||||||
12
docker/docker-bake-xpdf.hcl
Normal file
12
docker/docker-bake-xpdf.hcl
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
variable "XPDF_VERSION" {
|
||||||
|
default = "4.04"
|
||||||
|
}
|
||||||
|
|
||||||
|
target "xpdf" {
|
||||||
|
dockerfile = "Dockerfile.xpdf"
|
||||||
|
tags = ["deepset/xpdf:latest"]
|
||||||
|
args = {
|
||||||
|
xpdf_version = "${XPDF_VERSION}"
|
||||||
|
}
|
||||||
|
platforms = ["linux/amd64", "linux/arm64"]
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user