feat: separate out preview tests (#5639)

* add preview workflows * feedback * feedback * use preview extra * remove coverage and add separate e2e * rename workflow file for consistency * trigger ci * undo trigger * torch import in testing * add deps to unit tests * feedback * run container instead of service * comment * add if statement * fix tika version * separate out win integration tests * separate out all CIs * try installing docker on macos * exclude tika * remove tika docker
2025-11-26 23:15:59 +00:00 · 2023-09-29 13:16:08 +02:00 · 2023-09-29 13:16:08 +02:00 · 81b2e83d04
commit 81b2e83d04
parent d61df24b27
8 changed files with 410 additions and 6 deletions
--- a/.github/workflows/ci_metrics.yml
+++ b/.github/workflows/ci_metrics.yml
@ -4,8 +4,10 @@ on:
  workflow_run:
    workflows:
      - "end-to-end"
+      - "end-to-end (Preview)"
      - "Linting"
      - "Tests"
+      - "Tests (Preview)"
      - "REST API Tests"
    types:
      - completed
--- a/.github/workflows/e2e.yml
+++ b/.github/workflows/e2e.yml
@ -13,6 +13,7 @@ on:
      - ready_for_review
    paths:
      - "e2e/**/*.py"
+      - "!e2e/preview/**/*.py"   # See e2e_preview.yml
      - ".github/workflows/e2e.yml"

 env:
--- a/.github/workflows/e2e_preview.yml
+++ b/.github/workflows/e2e_preview.yml
@ -0,0 +1,42 @@
+# If you change this name also do it in ci_metrics.yml
+name: end-to-end (Preview)
+
+on:
+  workflow_dispatch: # Activate this workflow manually
+  schedule:
+    - cron: "0 0 * * *"
+  pull_request:
+    types:
+      - opened
+      - reopened
+      - synchronize
+      - ready_for_review
+    paths:
+      - "e2e/preview/**/*.py"
+      - ".github/workflows/e2e_preview.yml"
+
+env:
+  PYTHON_VERSION: "3.8"
+  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+
+jobs:
+  run:
+    timeout-minutes: 60
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+
+    - uses: actions/setup-python@v4
+      with:
+        python-version: ${{ env.PYTHON_VERSION }}
+
+    - name: Install dependencies
+      run: |
+        sudo apt-get update
+        sudo apt install ffmpeg  # for local Whisper tests
+
+    - name: Install Haystack
+      run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2'
+
+    - name: Run tests
+      run: pytest e2e/preview
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -17,6 +17,8 @@ on:
    paths:
      - "**.py"
      - "pyproject.toml"
+      - "!haystack/preview/**/*.py"  # See tests_preview.yml
+      - "!test/preview/**/*.py"  # See tests_preview.yml
      - "!.github/**/*.py"
      - "!rest_api/**/*.py"
      - "!docs/**/*.py"
--- a/.github/workflows/tests_preview.yml
+++ b/.github/workflows/tests_preview.yml
@ -0,0 +1,324 @@
+# If you change this name also do it in tests_preview_skipper.yml
+name: Tests (Preview)
+
+on:
+  workflow_dispatch: # Activate this workflow manually
+  push:
+    branches:
+      - main
+      # release branches have the form v1.9.x
+      - "v[0-9].*[0-9].x"
+  pull_request:
+    types:
+      - opened
+      - reopened
+      - synchronize
+      - ready_for_review
+    paths:
+      - "haystack/preview/**/*.py"
+      - "test/preview/**/*.py"
+
+env:
+  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+  COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
+  CORE_AZURE_CS_ENDPOINT: ${{ secrets.CORE_AZURE_CS_ENDPOINT }}
+  CORE_AZURE_CS_API_KEY: ${{ secrets.CORE_AZURE_CS_API_KEY }}
+  PYTHON_VERSION: "3.8"
+
+jobs:
+  black:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Install Black
+        run: |
+          pip install --upgrade pip
+          pip install .[formatting]
+
+      - name: Check status
+        run: |
+          if ! black . --check; then
+            git status
+            echo "###################################################################################################"
+            echo "# "
+            echo "# CHECK FAILED! Black found issues with your code formatting."
+            echo "# "
+            echo "# Either:"
+            echo "# 1. Run Black locally before committing:"
+            echo "# "
+            echo "#     pip install .[formatting]"
+            echo "#     black ."
+            echo "# "
+            echo "# 2. Install the pre-commit hook:"
+            echo "# "
+            echo "#     pre-commit install"
+            echo "# "
+            echo "# 3. See https://github.com/deepset-ai/haystack/blob/main/CONTRIBUTING.md for help."
+            echo "# "
+            echo "# If you have further problems, please open an issue: https://github.com/deepset-ai/haystack/issues"
+            echo "# "
+            echo "##################################################################################################"
+            exit 1
+          fi
+
+      - name: Calculate alert data
+        id: calculator
+        shell: bash
+        if: (success() || failure()) && github.ref_name == 'main'
+        run: |
+          if [ "${{ job.status }}" = "success" ]; then
+            echo "alert_type=success" >> "$GITHUB_OUTPUT";
+          else
+            echo "alert_type=error" >> "$GITHUB_OUTPUT";
+          fi
+
+      - name: Send event to Datadog
+        if: (success() || failure()) && github.ref_name == 'main'
+        uses: masci/datadog@v1
+        with:
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
+          api-url: https://api.datadoghq.eu
+          events: |
+            - title: "${{ github.workflow }} workflow"
+              text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
+              alert_type: "${{ steps.calculator.outputs.alert_type }}"
+              source_type_name: "Github"
+              host: ${{ github.repository_owner }}
+              tags:
+                - "project:${{ github.repository }}"
+                - "job:${{ github.job }}"
+                - "run_id:${{ github.run_id }}"
+                - "workflow:${{ github.workflow }}"
+                - "branch:${{ github.ref_name }}"
+                - "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+
+  unit-tests:
+    name: Unit / ${{ matrix.os }}
+    needs: black
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - ubuntu-latest
+          - windows-latest
+          - macos-latest
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Install Haystack
+        run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2'
+
+      - name: Run
+        run: pytest -m "unit" test/preview
+
+      - name: Calculate alert data
+        id: calculator
+        shell: bash
+        if: (success() || failure()) && github.ref_name == 'main'
+        run: |
+          if [ "${{ job.status }}" = "success" ]; then
+            echo "alert_type=success" >> "$GITHUB_OUTPUT";
+          else
+            echo "alert_type=error" >> "$GITHUB_OUTPUT";
+          fi
+
+      - name: Send event to Datadog
+        if: (success() || failure()) && github.ref_name == 'main'
+        uses: masci/datadog@v1
+        with:
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
+          api-url: https://api.datadoghq.eu
+          events: |
+            - title: "${{ github.workflow }} workflow"
+              text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
+              alert_type: "${{ steps.calculator.outputs.alert_type }}"
+              source_type_name: "Github"
+              host: ${{ github.repository_owner }}
+              tags:
+                - "project:${{ github.repository }}"
+                - "job:${{ github.job }}"
+                - "run_id:${{ github.run_id }}"
+                - "workflow:${{ github.workflow }}"
+                - "branch:${{ github.ref_name }}"
+                - "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+
+
+  integration-tests-linux:
+    name: Integration / ubuntu-latest
+    needs: unit-tests
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - ubuntu-latest
+    runs-on: ${{ matrix.os }}
+    services:
+      tika:
+        image: apache/tika:2.9.0.0
+        ports:
+          - 9998:9998
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Install dependencies
+        run: |
+          sudo apt update
+          sudo apt install ffmpeg  # for local Whisper tests
+
+      - name: Install Haystack
+        run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2'
+
+      - name: Run
+        run: pytest --maxfail=5 -m "integration" test/preview
+
+      - name: Calculate alert data
+        id: calculator
+        shell: bash
+        if: (success() || failure()) && github.ref_name == 'main'
+        run: |
+          if [ "${{ job.status }}" = "success" ]; then
+            echo "alert_type=success" >> "$GITHUB_OUTPUT";
+          else
+            echo "alert_type=error" >> "$GITHUB_OUTPUT";
+          fi
+
+      - name: Send event to Datadog
+        if: (success() || failure()) && github.ref_name == 'main'
+        uses: masci/datadog@v1
+        with:
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
+          api-url: https://api.datadoghq.eu
+          events: |
+            - title: "${{ github.workflow }} workflow"
+              text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
+              alert_type: "${{ steps.calculator.outputs.alert_type }}"
+              source_type_name: "Github"
+              host: ${{ github.repository_owner }}
+              tags:
+                - "project:${{ github.repository }}"
+                - "job:${{ github.job }}"
+                - "run_id:${{ github.run_id }}"
+                - "workflow:${{ github.workflow }}"
+                - "branch:${{ github.ref_name }}"
+                - "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+
+
+  integration-tests-macos:
+    name: Integration / macos-latest
+    needs: unit-tests
+    runs-on: macos-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Install dependencies
+        run: |
+          brew install ffmpeg  # for local Whisper tests
+          brew install docker
+          colima start
+
+      - name: Install Haystack
+        run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2'
+
+      - name: Run Tika
+        run: docker run -d -p 9998:9998 apache/tika:2.9.0.0
+
+      - name: Run
+        run: pytest --maxfail=5 -m "integration" test/preview
+
+      - name: Calculate alert data
+        id: calculator
+        shell: bash
+        if: (success() || failure()) && github.ref_name == 'main'
+        run: |
+          if [ "${{ job.status }}" = "success" ]; then
+            echo "alert_type=success" >> "$GITHUB_OUTPUT";
+          else
+            echo "alert_type=error" >> "$GITHUB_OUTPUT";
+          fi
+
+      - name: Send event to Datadog
+        if: (success() || failure()) && github.ref_name == 'main'
+        uses: masci/datadog@v1
+        with:
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
+          api-url: https://api.datadoghq.eu
+          events: |
+            - title: "${{ github.workflow }} workflow"
+              text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
+              alert_type: "${{ steps.calculator.outputs.alert_type }}"
+              source_type_name: "Github"
+              host: ${{ github.repository_owner }}
+              tags:
+                - "project:${{ github.repository }}"
+                - "job:${{ github.job }}"
+                - "run_id:${{ github.run_id }}"
+                - "workflow:${{ github.workflow }}"
+                - "branch:${{ github.ref_name }}"
+                - "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+
+
+  integration-tests-windows:
+    name: Integration / windows-latest
+    needs: unit-tests
+    runs-on: windows-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Install Haystack
+        run: pip install .[dev,preview] langdetect transformers[torch,sentencepiece]==4.32.1 'sentence-transformers>=2.2.0' pypdf openai-whisper tika 'azure-ai-formrecognizer>=3.2.0b2'
+
+      - name: Run
+        run: pytest --maxfail=5 -m "integration" test/preview -k 'not tika'
+
+      - name: Calculate alert data
+        id: calculator
+        shell: bash
+        if: (success() || failure()) && github.ref_name == 'main'
+        run: |
+          if [ "${{ job.status }}" = "success" ]; then
+            echo "alert_type=success" >> "$GITHUB_OUTPUT";
+          else
+            echo "alert_type=error" >> "$GITHUB_OUTPUT";
+          fi
+
+      - name: Send event to Datadog
+        if: (success() || failure()) && github.ref_name == 'main'
+        uses: masci/datadog@v1
+        with:
+          api-key: ${{ secrets.CORE_DATADOG_API_KEY }}
+          api-url: https://api.datadoghq.eu
+          events: |
+            - title: "${{ github.workflow }} workflow"
+              text: "Job ${{ github.job }} in branch ${{ github.ref_name }}"
+              alert_type: "${{ steps.calculator.outputs.alert_type }}"
+              source_type_name: "Github"
+              host: ${{ github.repository_owner }}
+              tags:
+                - "project:${{ github.repository }}"
+                - "job:${{ github.job }}"
+                - "run_id:${{ github.run_id }}"
+                - "workflow:${{ github.workflow }}"
+                - "branch:${{ github.ref_name }}"
+                - "url:https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
--- a/.github/workflows/tests_preview_skipper.yml
+++ b/.github/workflows/tests_preview_skipper.yml
@ -0,0 +1,21 @@
+# If you change this name also do it in tests_preview.yml
+name: Tests (Preview)
+
+on:
+  pull_request:
+    types:
+      - opened
+      - reopened
+      - synchronize
+      - ready_for_review
+    paths-ignore:
+      - "haystack/preview/**/*.py"
+      - "test/preview/**/*.py"
+
+jobs:
+  catch-all:
+    name: Catch-all check
+    runs-on: ubuntu-latest
+    steps:
+      - name: Skip preview tests
+        run: echo "Skipped!"
--- a/.github/workflows/tests_skipper.yml
+++ b/.github/workflows/tests_skipper.yml
@ -10,6 +10,8 @@ on:
      - ready_for_review
    paths-ignore:
      - "**.py"
+      - "!haystack/preview/**/*.py"  # See tests_preview.yml
+      - "!test/preview/**/*.py"  # See tests_preview.yml
      - "pyproject.toml"
      - "!.github/**/*.py"
      - "!rest_api/**/*.py"
--- a/haystack/preview/testing/test_utils.py
+++ b/haystack/preview/testing/test_utils.py
@ -1,7 +1,10 @@
 import os
 import random
+import logging
 import numpy as np
-import torch
+
+
+logger = logging.getLogger(__name__)


 def set_all_seeds(seed: int, deterministic_cudnn: bool = False) -> None:
@ -16,9 +19,16 @@ def set_all_seeds(seed: int, deterministic_cudnn: bool = False) -> None:
    """
    random.seed(seed)
    np.random.seed(seed)
-    torch.manual_seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
-    torch.cuda.manual_seed_all(seed)
-    if deterministic_cudnn:
-        torch.backends.cudnn.deterministic = True
-        torch.backends.cudnn.benchmark = False
+
+    try:
+        import torch
+
+        torch.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+        if deterministic_cudnn:
+            torch.backends.cudnn.deterministic = True
+            torch.backends.cudnn.benchmark = False
+
+    except (ImportError, ModuleNotFoundError) as exc:
+        logger.info("Could not set PyTorch seed because torch is not installed. Exception: %s", exc)