mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-11-03 03:09:28 +00:00
ci: Move xpdf build into separate container (#4199)
* Create Dockerfile and hcl config to build Xpdf * Create workflow to build Xpdf Docker image * Update Dockerfile.base to not build Xpdf * Fix CWD removal and arg casing * Fix ARG setting
This commit is contained in:
parent
aaa1522c45
commit
30cdb81f19
39
.github/workflows/xpdf_release.yml
vendored
Normal file
39
.github/workflows/xpdf_release.yml
vendored
Normal file
@ -0,0 +1,39 @@
|
||||
name: Xpdf Docker image release
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- docker/docker-bake-xpdf.hcl
|
||||
- docker/Dockerfile.xpdf
|
||||
|
||||
jobs:
|
||||
publish-xpdf-image:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
DOCKER_REPO_NAME: deepset/xpdf
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v1
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_HUB_USER }}
|
||||
password: ${{ secrets.DOCKER_HUB_TOKEN }}
|
||||
|
||||
- name: Build and publish Xpdf image
|
||||
uses: docker/bake-action@v2
|
||||
with:
|
||||
files: "docker-bake-xpdf.hcl"
|
||||
workdir: docker
|
||||
targets: xpdf
|
||||
push: true
|
||||
@ -7,20 +7,6 @@ ARG DEBIAN_FRONTEND=noninteractive
|
||||
ARG haystack_version
|
||||
ARG haystack_extras
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential gcc git curl cmake \
|
||||
tesseract-ocr libtesseract-dev poppler-utils
|
||||
|
||||
# Install PDF converter
|
||||
RUN curl -O https://dl.xpdfreader.com/xpdf-4.04.tar.gz && \
|
||||
tar -xvf xpdf-4.04.tar.gz && \
|
||||
cd xpdf-4.04 && \
|
||||
cmake . && \
|
||||
make && \
|
||||
cp xpdf/pdftotext /opt && \
|
||||
cd .. && \
|
||||
rm -rf xpdf-4.04
|
||||
|
||||
# Shallow clone Haystack repo, we'll install from the local sources
|
||||
RUN git clone --depth=1 --branch=${haystack_version} https://github.com/deepset-ai/haystack.git /opt/haystack
|
||||
WORKDIR /opt/haystack
|
||||
@ -37,7 +23,8 @@ RUN pip install --upgrade pip && \
|
||||
FROM $base_image AS final
|
||||
|
||||
COPY --from=build-image /opt/venv /opt/venv
|
||||
COPY --from=build-image /opt/pdftotext /usr/local/bin
|
||||
COPY --from=deepset/xpdf:latest /opt/pdftotext /usr/local/bin
|
||||
|
||||
# pdftotext requires fontconfig runtime
|
||||
RUN apt-get update && apt-get install -y libfontconfig && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
22
docker/Dockerfile.xpdf
Normal file
22
docker/Dockerfile.xpdf
Normal file
@ -0,0 +1,22 @@
|
||||
FROM ubuntu:latest
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
cmake \
|
||||
curl \
|
||||
gcc \
|
||||
git \
|
||||
libtesseract-dev \
|
||||
poppler-utils \
|
||||
tesseract-ocr
|
||||
|
||||
ARG xpdf_version
|
||||
RUN curl -O https://dl.xpdfreader.com/xpdf-${xpdf_version}.tar.gz && \
|
||||
tar -xvf xpdf-${xpdf_version}.tar.gz && \
|
||||
cd xpdf-${xpdf_version} && \
|
||||
cmake . && \
|
||||
make && \
|
||||
cp xpdf/pdftotext /opt && \
|
||||
cd .. \
|
||||
rm -rf xpdf-${xpdf_version}
|
||||
12
docker/docker-bake-xpdf.hcl
Normal file
12
docker/docker-bake-xpdf.hcl
Normal file
@ -0,0 +1,12 @@
|
||||
variable "XPDF_VERSION" {
|
||||
default = "4.04"
|
||||
}
|
||||
|
||||
target "xpdf" {
|
||||
dockerfile = "Dockerfile.xpdf"
|
||||
tags = ["deepset/xpdf:latest"]
|
||||
args = {
|
||||
xpdf_version = "${XPDF_VERSION}"
|
||||
}
|
||||
platforms = ["linux/amd64", "linux/arm64"]
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user