diff --git a/.github/workflows/docker_release.yml b/.github/workflows/docker_release.yml new file mode 100644 index 000000000..53cbec6fc --- /dev/null +++ b/.github/workflows/docker_release.yml @@ -0,0 +1,93 @@ +name: Release Docker images + +on: + workflow_dispatch: + push: + branches: + - main + tags: + - v* + +env: + DOCKER_REPO_NAME: deepset/haystack + +jobs: + build-and-push: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKER_HUB_USER }} + password: ${{ secrets.DOCKER_HUB_TOKEN }} + + - name: Docker meta + id: meta + uses: docker/metadata-action@v4 + with: + images: $DOCKER_REPO_NAME + + - name: Build base images + uses: docker/bake-action@v2 + env: + IMAGE_TAG_SUFFIX: ${{ steps.meta.outputs.version }} + with: + workdir: docker + targets: base + push: true + + - name: Build api images + uses: docker/bake-action@v2 + env: + IMAGE_TAG_SUFFIX: ${{ steps.meta.outputs.version }} + BASE_IMAGE_TAG_SUFFIX: ${{ steps.meta.outputs.version }} + with: + workdir: docker + targets: api + push: true + + - name: Get latest version of Haystack + id: latest-version + uses: pozetroninc/github-action-get-latest-release@master + if: startsWith(github.ref, 'refs/tags/') + with: + repository: ${{ github.repository }} + excludes: prerelease, draft + + - name: Compare current version with latest + uses: madhead/semver-utils@latest + id: version + if: startsWith(github.ref, 'refs/tags/') + with: + # Version being built + version: ${{ github.ref_name }} + # Compare to latest + compare-to: ${{ steps.latest-version.outputs.release }} + + - name: Use latest + if: steps.version.outputs.comparison-result == '>' + run: | + echo ${{ steps.version.outputs.comparison-result }}; + echo ${{ steps.latest-version.outputs.release }}; + + # This step should only run when we release a new minor, so + # that we can tag the most recent image without the version number. + # For example, if the previous step builds `deepset/haystack:cpu-1.8.0`, + # this builds `deepset/haystack:cpu` + - name: Build api images no version in tag + uses: docker/bake-action@v2 + if: steps.version.outputs.comparison-result == '>' + env: + IMAGE_TAG_SUFFIX: ${{ steps.meta.outputs.version }} + BASE_IMAGE_TAG_SUFFIX: ${{ steps.meta.outputs.version }} + with: + workdir: docker + targets: api-latest + push: true diff --git a/docker/Dockerfile.api b/docker/Dockerfile.api new file mode 100644 index 000000000..1bd032d28 --- /dev/null +++ b/docker/Dockerfile.api @@ -0,0 +1,13 @@ +ARG base_image_tag + +FROM deepset/haystack:${base_image_tag} + +# Create a folder for the /file-upload API endpoint with write permissions +RUN mkdir -p /opt/file-upload && chmod 777 /opt/file-upload + +# Tell rest_api which folder to use for uploads +ENV FILE_UPLOAD_PATH="/opt/file-upload" + +EXPOSE 8000 + +CMD ["gunicorn", "rest_api.application:app", "-b", "0.0.0.0", "-k", "uvicorn.workers.UvicornWorker", "--workers", "1", "--timeout", "180"] diff --git a/docker/Dockerfile.base b/docker/Dockerfile.base new file mode 100644 index 000000000..42d99b81e --- /dev/null +++ b/docker/Dockerfile.base @@ -0,0 +1,38 @@ +ARG build_image +ARG base_immage + +FROM $build_image AS build-image + +ARG haystack_version +ARG haystack_extras +ARG torch_scatter + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential gcc git curl \ + tesseract-ocr libtesseract-dev poppler-utils + +# Install PDF converter +RUN curl -O https://dl.xpdfreader.com/xpdf-tools-linux-4.04.tar.gz && \ + tar -xvf xpdf-tools-linux-4.04.tar.gz && \ + cp xpdf-tools-linux-4.04/bin64/pdftotext /opt && \ + rm -rf xpdf-tools-linux-4.04 + +# Shallow clone Haystack repo, we'll install from the local sources +RUN git clone --depth=1 --branch=${haystack_version} https://github.com/deepset-ai/haystack.git /opt/haystack +WORKDIR /opt/haystack + +# Use a virtualenv we can copy over the next build stage +RUN python -m venv --system-site-packages /opt/venv +ENV PATH="/opt/venv/bin:$PATH" + +RUN pip install --upgrade pip && \ + pip install --no-cache-dir .${haystack_extras} && \ + pip install --no-cache-dir ./rest_api && \ + pip install --no-cache-dir torch-scatter -f $torch_scatter + +FROM $base_immage AS final + +COPY --from=build-image /opt/venv /opt/venv +COPY --from=build-image /opt/pdftotext /usr/local/bin + +ENV PATH="/opt/venv/bin:$PATH" diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 000000000..57a1f2d5b --- /dev/null +++ b/docker/README.md @@ -0,0 +1,49 @@ +# Haystack Docker image + +Haystack is an end-to-end framework that enables you to build powerful and production-ready +pipelines for different search use cases. The Docker image comes with a web service +configured to serve Haystack's `rest_api` to ease pipelines' deployments in containerized +environments. + +Start the Docker container binding the TCP port `8000` locally: +```sh +docker run -p 8000:8000 deepset/haystack +``` + +If you need the container to access other services available in the host: +```sh +docker run -p 8000:8000 --network="host" deepset/haystack +``` + +## Image variants + +The Docker image comes in two variants: +- `haystack:cpu-`: this image is smaller but doesn't support GPU +- `haystack:gpu-`: this image comes with the Cuda runtime and is capable of running on GPUs + + +## Image development + +Images are built with BuildKit and we use `bake` to orchestrate the process. +You can build a specific image by simply run: +```sh +docker buildx bake gpu +``` + +You can override any `variable` defined in the `docker-bake.hcl` file and build custom +images, for example if you want to use a branch from the Haystack repo: +```sh +HAYSTACK_VERSION=mybranch_or_tag BASE_IMAGE_TAG_SUFFIX=latest docker buildx bake gpu --no-cache +``` + +# License + +View [license information](https://github.com/deepset-ai/haystack/blob/main/LICENSE) for +the software contained in this image. + +As with all Docker images, these likely also contain other software which may be under +other licenses (such as Bash, etc from the base distribution, along with any direct or +indirect dependencies of the primary software being contained). + +As for any pre-built image usage, it is the image user's responsibility to ensure that any +use of this image complies with any relevant licenses for all software contained within. \ No newline at end of file diff --git a/docker/docker-bake.hcl b/docker/docker-bake.hcl new file mode 100644 index 000000000..0dec4affd --- /dev/null +++ b/docker/docker-bake.hcl @@ -0,0 +1,94 @@ +variable "HAYSTACK_VERSION" { + default = "main" +} + +variable "GITHUB_REF" { + default = "" +} + +variable "IMAGE_NAME" { + default = "deepset/haystack" +} + +variable "IMAGE_TAG_SUFFIX" { + default = "local" +} + +variable "BASE_IMAGE_TAG_SUFFIX" { + default = "local" +} + +variable "HAYSTACK_EXTRAS" { + default = "" +} + +group "base" { + targets = ["base", "base-gpu"] +} + +group "api" { + targets = ["cpu", "gpu"] +} + +group "api-latest" { + targets = ["cpu-latest", "gpu-latest"] +} + +group "all" { + targets = ["base", "base-gpu", "cpu", "gpu"] +} + +target "docker-metadata-action" {} + +target "base" { + dockerfile = "Dockerfile.base" + tags = ["${IMAGE_NAME}:base-${IMAGE_TAG_SUFFIX}"] + args = { + build_image = "python:3.10-slim" + base_immage = "python:3.10-slim" + haystack_version = "${HAYSTACK_VERSION}" + haystack_extras = notequal("",HAYSTACK_EXTRAS) ? "${HAYSTACK_EXTRAS}" : "[docstores,crawler,preprocessing,ocr,onnx,beir]" + torch_scatter = "https://data.pyg.org/whl/torch-1.12.0+cpu.html" + } +} + +target "base-gpu" { + dockerfile = "Dockerfile.base" + tags = ["${IMAGE_NAME}:base-gpu-${IMAGE_TAG_SUFFIX}"] + args = { + build_image = "pytorch/pytorch:1.12.1-cuda11.3-cudnn8-runtime" + base_immage = "pytorch/pytorch:1.12.1-cuda11.3-cudnn8-runtime" + haystack_version = "${HAYSTACK_VERSION}" + haystack_extras = notequal("",HAYSTACK_EXTRAS) ? "${HAYSTACK_EXTRAS}" : "[docstores-gpu,crawler,preprocessing,ocr,onnx-gpu,beir]" + torch_scatter = "https://data.pyg.org/whl/torch-1.12.1%2Bcu113.html" + } +} + +target "cpu" { + dockerfile = "Dockerfile.api" + tags = ["${IMAGE_NAME}:cpu-${IMAGE_TAG_SUFFIX}"] + args = { + base_image_tag = "base-${BASE_IMAGE_TAG_SUFFIX}" + } +} + +target "cpu-latest" { + inherits = ["cpu"] + tags = ["${IMAGE_NAME}:cpu"] +} + +target "gpu" { + dockerfile = "Dockerfile.api" + tags = ["${IMAGE_NAME}:gpu-${IMAGE_TAG_SUFFIX}"] + args = { + base_image_tag = "base-gpu-${BASE_IMAGE_TAG_SUFFIX}" + } + platforms = [ + "linux/amd64" + ] +} + +target "gpu-latest" { + inherits = ["gpu"] + tags = ["${IMAGE_NAME}:gpu"] +}