mirror of
https://github.com/deepset-ai/haystack.git
synced 2026-01-08 13:06:29 +00:00
refactoring: reimplement Docker strategy (#3162)
* setup base images * add cpu flavor * use the same Dockerfile for cpu and gpu * better naming, add docs * add docker workflow * add missing image input * change cwd for bake * also push api images * try conditional tagging for releases * revert testing code * update docker readme * document variable override * use Python 3.10 * allow empty HAYSTACK_EXTRAS * Apply suggestions from code review Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai> * remove repo description step, can't make it work so far * add docs to the last step as it's tricky * manage tags for the newest images * tests are passing, checking in the last bit Co-authored-by: Sara Zan <sara.zanzottera@deepset.ai>
This commit is contained in:
parent
21aedc644f
commit
64b0c43885
93
.github/workflows/docker_release.yml
vendored
Normal file
93
.github/workflows/docker_release.yml
vendored
Normal file
@ -0,0 +1,93 @@
|
||||
name: Release Docker images
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
tags:
|
||||
- v*
|
||||
|
||||
env:
|
||||
DOCKER_REPO_NAME: deepset/haystack
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v1
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_HUB_USER }}
|
||||
password: ${{ secrets.DOCKER_HUB_TOKEN }}
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v4
|
||||
with:
|
||||
images: $DOCKER_REPO_NAME
|
||||
|
||||
- name: Build base images
|
||||
uses: docker/bake-action@v2
|
||||
env:
|
||||
IMAGE_TAG_SUFFIX: ${{ steps.meta.outputs.version }}
|
||||
with:
|
||||
workdir: docker
|
||||
targets: base
|
||||
push: true
|
||||
|
||||
- name: Build api images
|
||||
uses: docker/bake-action@v2
|
||||
env:
|
||||
IMAGE_TAG_SUFFIX: ${{ steps.meta.outputs.version }}
|
||||
BASE_IMAGE_TAG_SUFFIX: ${{ steps.meta.outputs.version }}
|
||||
with:
|
||||
workdir: docker
|
||||
targets: api
|
||||
push: true
|
||||
|
||||
- name: Get latest version of Haystack
|
||||
id: latest-version
|
||||
uses: pozetroninc/github-action-get-latest-release@master
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
with:
|
||||
repository: ${{ github.repository }}
|
||||
excludes: prerelease, draft
|
||||
|
||||
- name: Compare current version with latest
|
||||
uses: madhead/semver-utils@latest
|
||||
id: version
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
with:
|
||||
# Version being built
|
||||
version: ${{ github.ref_name }}
|
||||
# Compare to latest
|
||||
compare-to: ${{ steps.latest-version.outputs.release }}
|
||||
|
||||
- name: Use latest
|
||||
if: steps.version.outputs.comparison-result == '>'
|
||||
run: |
|
||||
echo ${{ steps.version.outputs.comparison-result }};
|
||||
echo ${{ steps.latest-version.outputs.release }};
|
||||
|
||||
# This step should only run when we release a new minor, so
|
||||
# that we can tag the most recent image without the version number.
|
||||
# For example, if the previous step builds `deepset/haystack:cpu-1.8.0`,
|
||||
# this builds `deepset/haystack:cpu`
|
||||
- name: Build api images no version in tag
|
||||
uses: docker/bake-action@v2
|
||||
if: steps.version.outputs.comparison-result == '>'
|
||||
env:
|
||||
IMAGE_TAG_SUFFIX: ${{ steps.meta.outputs.version }}
|
||||
BASE_IMAGE_TAG_SUFFIX: ${{ steps.meta.outputs.version }}
|
||||
with:
|
||||
workdir: docker
|
||||
targets: api-latest
|
||||
push: true
|
||||
13
docker/Dockerfile.api
Normal file
13
docker/Dockerfile.api
Normal file
@ -0,0 +1,13 @@
|
||||
ARG base_image_tag
|
||||
|
||||
FROM deepset/haystack:${base_image_tag}
|
||||
|
||||
# Create a folder for the /file-upload API endpoint with write permissions
|
||||
RUN mkdir -p /opt/file-upload && chmod 777 /opt/file-upload
|
||||
|
||||
# Tell rest_api which folder to use for uploads
|
||||
ENV FILE_UPLOAD_PATH="/opt/file-upload"
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
CMD ["gunicorn", "rest_api.application:app", "-b", "0.0.0.0", "-k", "uvicorn.workers.UvicornWorker", "--workers", "1", "--timeout", "180"]
|
||||
38
docker/Dockerfile.base
Normal file
38
docker/Dockerfile.base
Normal file
@ -0,0 +1,38 @@
|
||||
ARG build_image
|
||||
ARG base_immage
|
||||
|
||||
FROM $build_image AS build-image
|
||||
|
||||
ARG haystack_version
|
||||
ARG haystack_extras
|
||||
ARG torch_scatter
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
build-essential gcc git curl \
|
||||
tesseract-ocr libtesseract-dev poppler-utils
|
||||
|
||||
# Install PDF converter
|
||||
RUN curl -O https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && \
|
||||
tar -xvf xpdf-tools-linux-4.04.tar.gz && \
|
||||
cp xpdf-tools-linux-4.04/bin64/pdftotext /opt && \
|
||||
rm -rf xpdf-tools-linux-4.04
|
||||
|
||||
# Shallow clone Haystack repo, we'll install from the local sources
|
||||
RUN git clone --depth=1 --branch=${haystack_version} https://github.com/deepset-ai/haystack.git /opt/haystack
|
||||
WORKDIR /opt/haystack
|
||||
|
||||
# Use a virtualenv we can copy over the next build stage
|
||||
RUN python -m venv --system-site-packages /opt/venv
|
||||
ENV PATH="/opt/venv/bin:$PATH"
|
||||
|
||||
RUN pip install --upgrade pip && \
|
||||
pip install --no-cache-dir .${haystack_extras} && \
|
||||
pip install --no-cache-dir ./rest_api && \
|
||||
pip install --no-cache-dir torch-scatter -f $torch_scatter
|
||||
|
||||
FROM $base_immage AS final
|
||||
|
||||
COPY --from=build-image /opt/venv /opt/venv
|
||||
COPY --from=build-image /opt/pdftotext /usr/local/bin
|
||||
|
||||
ENV PATH="/opt/venv/bin:$PATH"
|
||||
49
docker/README.md
Normal file
49
docker/README.md
Normal file
@ -0,0 +1,49 @@
|
||||
# Haystack Docker image
|
||||
|
||||
Haystack is an end-to-end framework that enables you to build powerful and production-ready
|
||||
pipelines for different search use cases. The Docker image comes with a web service
|
||||
configured to serve Haystack's `rest_api` to ease pipelines' deployments in containerized
|
||||
environments.
|
||||
|
||||
Start the Docker container binding the TCP port `8000` locally:
|
||||
```sh
|
||||
docker run -p 8000:8000 deepset/haystack
|
||||
```
|
||||
|
||||
If you need the container to access other services available in the host:
|
||||
```sh
|
||||
docker run -p 8000:8000 --network="host" deepset/haystack
|
||||
```
|
||||
|
||||
## Image variants
|
||||
|
||||
The Docker image comes in two variants:
|
||||
- `haystack:cpu-<version>`: this image is smaller but doesn't support GPU
|
||||
- `haystack:gpu-<version>`: this image comes with the Cuda runtime and is capable of running on GPUs
|
||||
|
||||
|
||||
## Image development
|
||||
|
||||
Images are built with BuildKit and we use `bake` to orchestrate the process.
|
||||
You can build a specific image by simply run:
|
||||
```sh
|
||||
docker buildx bake gpu
|
||||
```
|
||||
|
||||
You can override any `variable` defined in the `docker-bake.hcl` file and build custom
|
||||
images, for example if you want to use a branch from the Haystack repo:
|
||||
```sh
|
||||
HAYSTACK_VERSION=mybranch_or_tag BASE_IMAGE_TAG_SUFFIX=latest docker buildx bake gpu --no-cache
|
||||
```
|
||||
|
||||
# License
|
||||
|
||||
View [license information](https://github.com/deepset-ai/haystack/blob/main/LICENSE) for
|
||||
the software contained in this image.
|
||||
|
||||
As with all Docker images, these likely also contain other software which may be under
|
||||
other licenses (such as Bash, etc from the base distribution, along with any direct or
|
||||
indirect dependencies of the primary software being contained).
|
||||
|
||||
As for any pre-built image usage, it is the image user's responsibility to ensure that any
|
||||
use of this image complies with any relevant licenses for all software contained within.
|
||||
94
docker/docker-bake.hcl
Normal file
94
docker/docker-bake.hcl
Normal file
@ -0,0 +1,94 @@
|
||||
variable "HAYSTACK_VERSION" {
|
||||
default = "main"
|
||||
}
|
||||
|
||||
variable "GITHUB_REF" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "IMAGE_NAME" {
|
||||
default = "deepset/haystack"
|
||||
}
|
||||
|
||||
variable "IMAGE_TAG_SUFFIX" {
|
||||
default = "local"
|
||||
}
|
||||
|
||||
variable "BASE_IMAGE_TAG_SUFFIX" {
|
||||
default = "local"
|
||||
}
|
||||
|
||||
variable "HAYSTACK_EXTRAS" {
|
||||
default = ""
|
||||
}
|
||||
|
||||
group "base" {
|
||||
targets = ["base", "base-gpu"]
|
||||
}
|
||||
|
||||
group "api" {
|
||||
targets = ["cpu", "gpu"]
|
||||
}
|
||||
|
||||
group "api-latest" {
|
||||
targets = ["cpu-latest", "gpu-latest"]
|
||||
}
|
||||
|
||||
group "all" {
|
||||
targets = ["base", "base-gpu", "cpu", "gpu"]
|
||||
}
|
||||
|
||||
target "docker-metadata-action" {}
|
||||
|
||||
target "base" {
|
||||
dockerfile = "Dockerfile.base"
|
||||
tags = ["${IMAGE_NAME}:base-${IMAGE_TAG_SUFFIX}"]
|
||||
args = {
|
||||
build_image = "python:3.10-slim"
|
||||
base_immage = "python:3.10-slim"
|
||||
haystack_version = "${HAYSTACK_VERSION}"
|
||||
haystack_extras = notequal("",HAYSTACK_EXTRAS) ? "${HAYSTACK_EXTRAS}" : "[docstores,crawler,preprocessing,ocr,onnx,beir]"
|
||||
torch_scatter = "https://data.pyg.org/whl/torch-1.12.0+cpu.html"
|
||||
}
|
||||
}
|
||||
|
||||
target "base-gpu" {
|
||||
dockerfile = "Dockerfile.base"
|
||||
tags = ["${IMAGE_NAME}:base-gpu-${IMAGE_TAG_SUFFIX}"]
|
||||
args = {
|
||||
build_image = "pytorch/pytorch:1.12.1-cuda11.3-cudnn8-runtime"
|
||||
base_immage = "pytorch/pytorch:1.12.1-cuda11.3-cudnn8-runtime"
|
||||
haystack_version = "${HAYSTACK_VERSION}"
|
||||
haystack_extras = notequal("",HAYSTACK_EXTRAS) ? "${HAYSTACK_EXTRAS}" : "[docstores-gpu,crawler,preprocessing,ocr,onnx-gpu,beir]"
|
||||
torch_scatter = "https://data.pyg.org/whl/torch-1.12.1%2Bcu113.html"
|
||||
}
|
||||
}
|
||||
|
||||
target "cpu" {
|
||||
dockerfile = "Dockerfile.api"
|
||||
tags = ["${IMAGE_NAME}:cpu-${IMAGE_TAG_SUFFIX}"]
|
||||
args = {
|
||||
base_image_tag = "base-${BASE_IMAGE_TAG_SUFFIX}"
|
||||
}
|
||||
}
|
||||
|
||||
target "cpu-latest" {
|
||||
inherits = ["cpu"]
|
||||
tags = ["${IMAGE_NAME}:cpu"]
|
||||
}
|
||||
|
||||
target "gpu" {
|
||||
dockerfile = "Dockerfile.api"
|
||||
tags = ["${IMAGE_NAME}:gpu-${IMAGE_TAG_SUFFIX}"]
|
||||
args = {
|
||||
base_image_tag = "base-gpu-${BASE_IMAGE_TAG_SUFFIX}"
|
||||
}
|
||||
platforms = [
|
||||
"linux/amd64"
|
||||
]
|
||||
}
|
||||
|
||||
target "gpu-latest" {
|
||||
inherits = ["gpu"]
|
||||
tags = ["${IMAGE_NAME}:gpu"]
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user